# Setup

DATA_PATH <- "data.dta"

OUT_DIR  <- "pk_outputs"
MAIN_DIR <- file.path(OUT_DIR, "main_text")
FIG_DIR  <- file.path(OUT_DIR, "main_figures")
AUX_DIR  <- file.path(OUT_DIR, "main_aux")

for (d in c(OUT_DIR, MAIN_DIR, FIG_DIR, AUX_DIR)) {
  if (!dir.exists(d)) dir.create(d, recursive = TRUE)
}

set.seed(1025)

LOG_PATH <- file.path(AUX_DIR, "run_log.txt")
sink(LOG_PATH, split = TRUE)
on.exit(sink(), add = TRUE)

message(strrep("=", 70))
message("PKMID JCR Main Replication Script v4.16")
message("Timestamp: ", format(Sys.time(), "%Y-%m-%d %H:%M:%S"))
message("ZINB inflation variables: cum_peace_10pp + high_igo + no_contig")
message("Analytic samples: loaded from analysis_data.dta")
message("Inference: 95% confidence intervals throughout")
message(strrep("=", 70))

# Packages

pkgs <- c(
  "haven", "dplyr", "tibble", "tidyr", "stringr",
  "ggplot2", "MASS", "pscl", "sandwich", "lmtest",
  "officer", "flextable", "cowplot", "scales"
)

to_install <- pkgs[!pkgs %in% rownames(installed.packages())]
if (length(to_install) > 0) install.packages(to_install, dependencies = TRUE)
invisible(lapply(pkgs, require, character.only = TRUE))

# Utilities

stop_if_missing <- function(path) {
  if (!file.exists(path)) stop("Required file not found: ", path)
}

`%||%` <- function(x, y) if (is.null(x)) y else x
ensure_numeric <- function(x) as.numeric(x)

mode_value <- function(x) {
  x <- x[!is.na(x)]
  if (!length(x)) return(NA)
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

BASE_FAMILY <- "serif"

theme_pub <- function(base_size = 11) {
  ggplot2::theme_classic(base_size = base_size, base_family = BASE_FAMILY) +
    ggplot2::theme(
      plot.title    = ggplot2::element_text(face = "bold", hjust = 0.5,
                                            size = base_size + 2),
      plot.subtitle = ggplot2::element_text(hjust = 0.5, size = base_size,
                                            color = "gray20"),
      plot.caption  = ggplot2::element_text(hjust = 0, size = base_size - 2,
                                            color = "gray40",
                                            margin = ggplot2::margin(t = 10)),
      axis.title.x  = ggplot2::element_text(margin = ggplot2::margin(t = 10),
                                            size = base_size),
      axis.title.y  = ggplot2::element_text(margin = ggplot2::margin(r = 10),
                                            size = base_size),
      axis.text     = ggplot2::element_text(size = base_size - 1, color = "black"),
      legend.title  = ggplot2::element_blank(),
      legend.position  = "bottom",
      legend.text   = ggplot2::element_text(size = base_size),
      panel.grid.major = ggplot2::element_blank(),
      panel.grid.minor = ggplot2::element_blank(),
      plot.margin   = ggplot2::margin(15, 20, 15, 20),
      axis.line     = ggplot2::element_line(color = "black", linewidth = 0.5)
    )
}

COL_NOTROOPS  <- "#A8C5D8"
COL_TROOPS    <- "#E8A0B4"
COL_POINT_1   <- "#4A7BA7"
COL_POINT_2   <- "#D64078"
COL_POINT_3   <- "#5A9E6F"
COL_NAIVE     <- "#6BAED6"
COL_CF        <- "#E377C2"

save_fig <- function(p, filename, w = 8.5, h = 5.5, dpi = 450) {
  out <- file.path(FIG_DIR, filename)
  ggplot2::ggsave(out, plot = p, width = w, height = h, units = "in",
                  dpi = dpi, bg = "white")
  message("  -> Figure saved: ", basename(out))
  invisible(out)
}

vcovCL_generic <- function(model, cluster) {
  U <- tryCatch(sandwich::estfun(model), error = function(e) NULL)
  B <- tryCatch(sandwich::bread(model),  error = function(e) NULL)
  if (is.null(U) || is.null(B)) return(NULL)
  cl   <- as.factor(cluster)
  if (length(cl) != nrow(U)) return(NULL)
  Ug   <- rowsum(U, group = cl, reorder = FALSE)
  meat <- crossprod(Ug)
  N <- nrow(U); K <- ncol(U); G <- nrow(Ug)
  adj <- if (G > 1) (G / (G - 1)) * ((N - 1) / (N - K)) else 1
  B %*% (adj * meat) %*% B
}

tidy_polr_robust <- function(model, level = 0.95) {
  b    <- stats::coef(model)
  nms  <- names(b)
  p    <- length(b)
  crit <- stats::qnorm(1 - (1 - level) / 2)
  
  ci <- tryCatch({
    suppressWarnings(stats::confint(model, level = level))
  }, error = function(e) {
    message("  [tidy_polr] confint() failed: ", conditionMessage(e))
    NULL
  })
  
  if (!is.null(ci)) {
    ci_slopes <- ci[nms, , drop = FALSE]
    if (all(is.finite(ci_slopes))) {
      se <- (ci_slopes[, 2] - ci_slopes[, 1]) / (2 * crit)
      z  <- b / se
      pv <- 2 * stats::pnorm(abs(z), lower.tail = FALSE)
      message("  [tidy_polr] Using profile-likelihood CIs.")
      return(tibble::tibble(
        term      = nms,
        estimate  = as.numeric(b),
        std.error = as.numeric(se),
        conf.low  = as.numeric(ci_slopes[, 1]),
        conf.high = as.numeric(ci_slopes[, 2]),
        p.value   = as.numeric(pv)
      ))
    }
    message("  [tidy_polr] confint() returned non-finite values; trying next method.")
  }
  
  V2 <- tryCatch({
    H <- model$Hessian %||% model$Hess
    if (is.null(H)) stop("No Hessian")
    H_slopes <- H[seq_len(p), seq_len(p), drop = FALSE]
    lambda <- max(abs(diag(H_slopes))) * 1e-6
    H_reg <- H_slopes - diag(lambda, p)
    Hinv <- solve(H_reg)
    if (any(diag(Hinv) < 0, na.rm = TRUE)) Hinv <- solve(-H_reg)
    if (any(diag(Hinv) < 0, na.rm = TRUE) || any(!is.finite(diag(Hinv)))) {
      Hinv <- MASS::ginv(-H_slopes)
      if (any(diag(Hinv) < 0, na.rm = TRUE)) Hinv <- MASS::ginv(H_slopes)
    }
    d <- diag(Hinv)
    if (all(is.finite(d)) && all(d >= 0)) {
      dimnames(Hinv) <- list(nms, nms)
      Hinv
    } else stop("Regularized Hessian still has invalid diagonal")
  }, error = function(e) {
    message("  [tidy_polr] Regularized Hessian failed: ", conditionMessage(e))
    NULL
  })
  
  if (!is.null(V2)) {
    se <- sqrt(diag(V2))
    z  <- b / se
    pv <- 2 * stats::pnorm(abs(z), lower.tail = FALSE)
    message("  [tidy_polr] Using regularized Hessian inverse.")
    return(tibble::tibble(
      term      = nms,
      estimate  = as.numeric(b),
      std.error = as.numeric(se),
      conf.low  = as.numeric(b - crit * se),
      conf.high = as.numeric(b + crit * se),
      p.value   = as.numeric(pv)
    ))
  }
  
  message("  [tidy_polr] All analytic methods failed. Computing bootstrap SEs (200 reps)...")
  mf <- model$model
  if (is.null(mf)) stop("polr model frame not available for bootstrapping.")
  n_boot <- 200
  boot_mat <- matrix(NA_real_, nrow = n_boot, ncol = p)
  colnames(boot_mat) <- nms
  fmla <- stats::formula(model)
  
  for (bb in seq_len(n_boot)) {
    idx <- sample(nrow(mf), replace = TRUE)
    boot_data <- mf[idx, , drop = FALSE]
    mb <- tryCatch(
      MASS::polr(fmla, data = boot_data, method = "logistic",
                 Hess = FALSE, model = FALSE),
      error = function(e) NULL
    )
    if (!is.null(mb) && length(stats::coef(mb)) == p) boot_mat[bb, ] <- stats::coef(mb)
  }
  
  valid_rows <- apply(boot_mat, 1, function(r) all(is.finite(r)))
  boot_mat <- boot_mat[valid_rows, , drop = FALSE]
  if (nrow(boot_mat) < 50) warning("Bootstrap produced only ", nrow(boot_mat),
                                   " valid replicates. SEs may be unreliable.")
  se <- apply(boot_mat, 2, sd, na.rm = TRUE)
  z  <- b / se
  pv <- 2 * stats::pnorm(abs(z), lower.tail = FALSE)
  message("  [tidy_polr] Using bootstrap SEs (", nrow(boot_mat), " valid reps).")
  
  tibble::tibble(
    term      = nms,
    estimate  = as.numeric(b),
    std.error = as.numeric(se),
    conf.low  = as.numeric(b - crit * se),
    conf.high = as.numeric(b + crit * se),
    p.value   = as.numeric(pv)
  )
}

vcovCL_safe <- function(model, cluster) {
  V <- tryCatch(sandwich::vcovCL(model, cluster = cluster, type = "HC1"),
                error = function(e) NULL)
  if (is.null(V)) V <- vcovCL_generic(model, cluster = cluster)
  if (is.null(V)) V <- tryCatch(stats::vcov(model), error = function(e) NULL)
  if (is.null(V)) stop("Cannot compute VCOV for class: ", paste(class(model), collapse = "/"))
  d <- diag(V)
  if (any(!is.finite(d)) || any(d < 0)) {
    V2 <- tryCatch(stats::vcov(model), error = function(e) NULL)
    if (!is.null(V2)) V <- V2
  }
  V
}

vcovCL_zinb <- function(model, cluster) {
  if (!inherits(model, "zeroinfl")) return(vcovCL_safe(model, cluster))
  
  V <- tryCatch({
    G <- model$gradient
    if (is.null(G)) stop("no gradient slot")
    if (nrow(G) != length(cluster)) stop("cluster length mismatch")
    B  <- tryCatch(sandwich::bread(model), error = function(e) NULL)
    if (is.null(B)) B <- tryCatch(-MASS::ginv(model$hessian), error = function(e) NULL)
    if (is.null(B)) stop("no bread")
    cl   <- as.factor(cluster)
    Ug   <- rowsum(G, group = cl, reorder = FALSE)
    meat <- crossprod(Ug)
    N <- nrow(G); K <- ncol(G); Gc <- nrow(Ug)
    adj <- if (Gc > 1) (Gc / (Gc - 1)) * ((N - 1) / (N - K)) else 1
    V_  <- B %*% (adj * meat) %*% B
    d   <- diag(V_)
    if (any(!is.finite(d)) || any(d < 0)) stop("non-finite or negative diagonal")
    V_
  }, error = function(e) {
    message("  [vcovCL_zinb] gradient path: ", conditionMessage(e))
    NULL
  })
  if (!is.null(V)) return(V)
  
  V <- tryCatch(sandwich::vcovCL(model, cluster = cluster, type = "HC1"),
                error = function(e) NULL)
  if (!is.null(V) && all(is.finite(diag(V))) && all(diag(V) >= 0)) return(V)
  
  V <- vcovCL_generic(model, cluster)
  if (!is.null(V) && all(is.finite(diag(V))) && all(diag(V) >= 0)) return(V)
  
  message("  [vcovCL_zinb] All robust paths failed; using model vcov().")
  tryCatch(stats::vcov(model),
           error = function(e) stop("Cannot compute VCOV for zeroinfl model."))
}

tidy_from_vcov <- function(model, V, level = 0.95) {
  b    <- stats::coef(model)
  se   <- sqrt(pmax(diag(V), 0))
  z    <- ifelse(se > 0, b / se, NA_real_)
  pv   <- ifelse(!is.na(z), 2 * stats::pnorm(abs(z), lower.tail = FALSE), NA_real_)
  crit <- stats::qnorm(1 - (1 - level) / 2)
  tibble::tibble(
    term      = names(b),
    estimate  = as.numeric(b),
    std.error = as.numeric(se),
    conf.low  = as.numeric(ifelse(se > 0, b - crit * se, NA_real_)),
    conf.high = as.numeric(ifelse(se > 0, b + crit * se, NA_real_)),
    p.value   = as.numeric(pv)
  )
}

stars <- function(p) {
  dplyr::case_when(
    is.na(p)  ~ "",
    p < 0.01  ~ "***",
    p < 0.05  ~ "**",
    p < 0.10  ~ "*",
    TRUE      ~ ""
  )
}

fmt_cell_ci <- function(est, lo, hi, p, se = NULL, digits = 3) {
  est <- est[1]; lo <- lo[1]; hi <- hi[1]; p <- p[1]
  if (!is.null(se)) se_val <- se[1] else se_val <- NULL
  if (is.na(est)) return("\u2014")
  star_str <- stars(p)
  if (!is.na(lo) && !is.na(hi) && is.finite(lo) && is.finite(hi)) {
    paste0(
      formatC(est, format = "f", digits = digits), star_str,
      "\n[", formatC(lo, format = "f", digits = digits), "; ",
      formatC(hi, format = "f", digits = digits), "]"
    )
  } else if (!is.null(se_val) && !is.na(se_val) && is.finite(se_val) && se_val > 0) {
    paste0(
      formatC(est, format = "f", digits = digits), star_str,
      "\n(", formatC(se_val, format = "f", digits = digits), ")"
    )
  } else {
    paste0(formatC(est, format = "f", digits = digits), star_str)
  }
}

fmt_cell_or <- function(beta, lo, hi, p, digits = 3) {
  beta <- beta[1]; lo <- lo[1]; hi <- hi[1]; p <- p[1]
  if (is.na(beta)) return("\u2014")
  star_str <- stars(p)
  if (!is.na(lo) && !is.na(hi) && is.finite(lo) && is.finite(hi)) {
    paste0(
      formatC(exp(beta), format = "f", digits = digits), star_str,
      "\n[", formatC(exp(lo), format = "f", digits = digits), "; ",
      formatC(exp(hi), format = "f", digits = digits), "]"
    )
  } else {
    paste0(formatC(exp(beta), format = "f", digits = digits), star_str)
  }
}

tidy_polr_slopes <- function(model, level = 0.95) tidy_polr_robust(model, level = level)

tidy_glm_cl <- function(model, data, level = 0.95) {
  td <- tidy_from_vcov(model, vcovCL_safe(model, cluster = data$cow), level)
  td[td$term != "(Intercept)", ]
}

tidy_zeroinfl_part <- function(model, data, part = c("count", "zero"), level = 0.95) {
  part   <- match.arg(part)
  prefix <- if (part == "count") "count_" else "zero_"
  crit   <- stats::qnorm(1 - (1 - level) / 2)
  
  b_slot <- if (part == "count") model$coefficients$count else model$coefficients$zero
  if (is.null(b_slot) || length(b_slot) == 0) {
    message("  [tidy_zeroinfl_part] $coefficients$", part, " is empty.")
    return(tibble::tibble())
  }
  b   <- as.numeric(b_slot)
  nms <- names(b_slot)
  p   <- length(b)
  
  se <- tryCatch({
    V  <- vcovCL_zinb(model, cluster = data$cow)
    rn <- rownames(V)
    
    idx_a <- grep(paste0("^", prefix), rn)
    idx_b <- which(rn %in% nms)
    n_count <- length(model$coefficients$count)
    n_zero  <- length(model$coefficients$zero)
    idx_c   <- if (part == "zero") seq(n_count + 1, n_count + n_zero) else seq_len(n_count)
    idx_c   <- idx_c[idx_c >= 1 & idx_c <= nrow(V)]
    
    idx <- if      (length(idx_a) == p) idx_a
    else if (length(idx_b) == p) idx_b
    else if (length(idx_c) == p) idx_c
    else {
      message("  [tidy_zeroinfl_part] Clustered VCOV block identification failed; using summary() SEs.")
      return(NULL)
    }
    
    se_cl <- sqrt(pmax(diag(V[idx, idx, drop = FALSE]), 0))
    if (any(!is.finite(se_cl))) {
      message("  [tidy_zeroinfl_part] Clustered VCOV has non-finite SEs; using summary() SEs.")
      return(NULL)
    }
    message("  [tidy_zeroinfl_part] Using cluster-robust SEs (", part, " part).")
    se_cl
    
  }, error = function(e) {
    message("  [tidy_zeroinfl_part] Clustered VCOV error: ", conditionMessage(e),
            " — using summary() SEs.")
    NULL
  })
  
  if (is.null(se)) {
    sm  <- tryCatch(summary(model)$coefficients[[part]], error = function(e) NULL)
    if (!is.null(sm) && "Std. Error" %in% colnames(sm)) {
      se_sm <- as.numeric(sm[, "Std. Error"])
      if (!is.null(rownames(sm)) && all(nms %in% rownames(sm))) {
        se_sm <- se_sm[match(nms, rownames(sm))]
      }
      se <- se_sm
      message("  [tidy_zeroinfl_part] Using summary() model-based SEs (", part, " part).")
    } else {
      message("  [tidy_zeroinfl_part] summary() also failed — returning NA SEs.")
      se <- rep(NA_real_, p)
    }
  }
  
  z  <- ifelse(!is.na(se) & se > 0, b / se, NA_real_)
  pv <- ifelse(!is.na(z), 2 * stats::pnorm(abs(z), lower.tail = FALSE), NA_real_)
  
  td <- tibble::tibble(
    term      = nms,
    estimate  = b,
    std.error = se,
    conf.low  = ifelse(!is.na(se) & se > 0, b - crit * se, NA_real_),
    conf.high = ifelse(!is.na(se) & se > 0, b + crit * se, NA_real_),
    p.value   = pv
  )
  td[td$term != "(Intercept)", ]
}

tidy_count_model <- function(model, data, level = 0.95) {
  if (inherits(model, "zeroinfl")) tidy_zeroinfl_part(model, data, "count", level)
  else {
    td <- tidy_glm_cl(model, data, level)
    td[td$term != "(Intercept)", ]
  }
}

tidy_inflate_model <- function(model, data, level = 0.95) {
  if (!inherits(model, "zeroinfl")) {
    message("  [tidy_inflate_model] model is not zeroinfl (class: ",
            paste(class(model), collapse = "/"), ") — no inflate table possible.")
    return(tibble::tibble())
  }
  tidy_zeroinfl_part(model, data, "zero", level)
}

model_N <- function(m) {
  out <- tryCatch(stats::nobs(m), error = function(e) NA_integer_)
  if (is.na(out)) out <- tryCatch(nrow(stats::model.frame(m)), error = function(e) NA_integer_)
  out
}

# Data

message("\nLoading analysis-ready data...")

stop_if_missing(DATA_PATH)
d0 <- haven::read_dta(DATA_PATH)

d0$troop_binary <- as.integer(d0$troop_binary)
d0$initiator    <- as.integer(d0$initiator)

HOSTLEV_LABS <- c("No Dispute", "No Military Action", "Threat",
                  "Display of Force", "Use of Force", "War")
d0$hostlev_f <- factor(d0$hostlev, levels = 0:5,
                       labels = HOSTLEV_LABS, ordered = TRUE)

IVS    <- c("troop_binary", "ln_max_total_peacekeeper", "logged_pk5years")
RC     <- c("lag_mid_bin", "ucdp_intrastate", "persistent_dispute", "ucdp_interstate_5yr")
CTRL   <- c("democracy_polity2", "ln_capability", "trade_openness", "igo_membership", "ln_gdp_cap")
REGION <- c("asia", "africa", "north_america", "south_america")
TPOLY  <- c("time", "time_squared", "time_cubed")

keep_present <- function(v) v[v %in% names(d0)]
RHS_COMMON   <- keep_present(c(RC, CTRL, REGION, TPOLY))
INFL_ZINB    <- keep_present(c("cum_peace_10pp", "high_igo", "no_contig"))

if (!all(c("sample_T1", "sample_T2", "sample_T3") %in% names(d0))) {
  stop("sample_T1, sample_T2, and sample_T3 must exist in data.dta.")
}
if (!"neverpk_dummy" %in% names(d0)) {
  stop("neverpk_dummy must exist in analysis_data.dta. Run 00_build_analysis_data.R first.")
}
if (length(INFL_ZINB) < 2) {
  stop("Fewer than two ZINB inflation variables found in analysis_data.dta.")
}

d_T1 <- d0[d0$sample_T1 == 1L, ]
d_T2 <- d0[d0$sample_T2 == 1L, ]
d_T3 <- d0[d0$sample_T3 == 1L, ]

message("  Loaded: N = ", nrow(d0), " rows")
message("  T1 N = ", nrow(d_T1), " | T2 N = ", nrow(d_T2), " | T3 N = ", nrow(d_T3))
message("  ZINB inflate: ", paste(INFL_ZINB, collapse = " + "))

# Models

rhs_str  <- function(iv) paste(c(iv, RHS_COMMON), collapse = " + ")
infl_str <- function(v)  paste(v, collapse = " + ")

fit_ologit <- function(iv, data) {
  MASS::polr(
    stats::as.formula(paste0("hostlev_f ~ ", rhs_str(iv))),
    data = data, method = "logistic", Hess = TRUE, model = TRUE
  )
}

fit_logit <- function(iv, data) {
  stats::glm(
    stats::as.formula(paste0("initiator ~ ", rhs_str(iv))),
    data = data, family = stats::binomial(link = "logit")
  )
}

fit_zinb <- function(iv, data) {
  f_count <- paste0("mid_sum ~ ", rhs_str(iv))
  infl    <- INFL_ZINB
  if (!length(infl)) {
    message("  No inflation variables available; estimating standard NB.")
    return(MASS::glm.nb(stats::as.formula(f_count), data = data,
                        control = stats::glm.control(maxit = 100)))
  }
  f_full <- paste0(f_count, " | ", infl_str(infl))
  message("  Formula: ", f_full)
  
  m <- tryCatch(
    pscl::zeroinfl(
      stats::as.formula(f_full),
      data = data, dist = "negbin", link = "logit",
      control = pscl::zeroinfl.control(maxit = 3000)
    ),
    error = function(e) {
      message("  ZINB failed for '", iv, "': ", conditionMessage(e))
      NULL
    }
  )
  if (!is.null(m)) {
    message("  Converged: inflate coefs [",
            paste(names(m$coefficients$zero), collapse = ", "), "]")
    return(m)
  }
  
  message("  ZINB did not converge for '", iv, "'; falling back to NB.")
  MASS::glm.nb(stats::as.formula(f_count), data = data,
               control = stats::glm.control(maxit = 100))
}

message("\nEstimating models...")

m_ol_1 <- fit_ologit("troop_binary",             d_T1)
m_ol_2 <- fit_ologit("ln_max_total_peacekeeper", d_T1)
m_ol_3 <- fit_ologit("logged_pk5years",          d_T1)

m_lg_1 <- fit_logit("troop_binary",             d_T2)
m_lg_2 <- fit_logit("ln_max_total_peacekeeper", d_T2)
m_lg_3 <- fit_logit("logged_pk5years",          d_T2)

m_z_1  <- fit_zinb("troop_binary",             d_T3)
m_z_2  <- fit_zinb("ln_max_total_peacekeeper", d_T3)
m_z_3  <- fit_zinb("logged_pk5years",          d_T3)

message("  Estimation complete.")
message("  \u2713 ZINB inflate: ", paste(INFL_ZINB, collapse = " + "))

# Tables

VAR_LABELS <- c(
  troop_binary             = "Troop Contribution (Binary)",
  ln_max_total_peacekeeper = "Ln(All Peacekeepers)",
  logged_pk5years          = "Logged Troops (5-Year Stock)",
  lag_mid_bin              = "Any MID (t\u22121)",
  ucdp_intrastate          = "UCDP Intrastate Conflict",
  persistent_dispute       = "Persistent Territorial Dispute (Prior 5 Years)",
  ucdp_interstate_5yr      = "Interstate Conflict (Prior 5 Years)",
  democracy_polity2        = "Democracy (Polity2)",
  ln_capability            = "Ln(CINC Capability)",
  trade_openness           = "Trade Openness (% GDP)",
  igo_membership           = "IGO Memberships",
  ln_gdp_cap               = "Ln(GDP per capita)",
  asia                     = "Asia",
  africa                   = "Africa",
  north_america            = "North America",
  south_america            = "South America",
  time                     = "Time",
  time_squared             = "Time\u00b2",
  time_cubed               = "Time\u00b3",
  cum_peace_10pp           = "Cumulative Peace (per 10 pp)",
  high_igo                 = "High IGO Embeddedness (Top Tercile)",
  no_contig                = "No Contiguous Neighbors"
)

label_term <- function(x) {
  out <- unname(VAR_LABELS[x])
  ifelse(is.na(out),
         stringr::str_to_title(stringr::str_replace_all(x, "_", " ")), out)
}

SECTION_HEADERS <- list(
  troop_binary      = "Peacekeeping participation",
  lag_mid_bin       = "Reverse-causality / security environment",
  democracy_polity2 = "Standard controls"
)

SECTION_HEADERS_INFL <- list(
  cum_peace_10pp = "Structural peace indicators",
  high_igo       = "Structural peace indicators",
  no_contig      = "Structural peace indicators"
)

TERM_ORDER_MAIN <- keep_present(c(
  "troop_binary", "ln_max_total_peacekeeper", "logged_pk5years",
  RC, CTRL
))

NOTE_CI   <- "Entries are point estimates [95% CI]. Significance: * p < .10; ** p < .05; *** p < .01."
NOTE_CLUS <- "Inference: cluster-robust SE (GLM/ZINB) or profile-likelihood CI (ordered logit)."
NOTE_REG  <- "Europe is the omitted region category."
NOTE_SUPP <- "All models include region FE and cubic time polynomial; suppressed for compactness (see Appendix)."
NOTE_ZINF <- paste0(
  "The inflation equation models the log-odds of structural (always-zero) status ",
  "using three time-invariant indicators: (1) cumulative peace share (per 10 pp), ",
  "(2) high IGO embeddedness (top tercile of country mean IGO membership), and ",
  "(3) no contiguous neighbors. Positive coefficients indicate a higher probability ",
  "of structural-zero status rather than incidental non-involvement."
)

build_table_df <- function(t1, t2, t3, term_order,
                           scale = c("coef", "or"),
                           sec_headers = SECTION_HEADERS) {
  scale  <- match.arg(scale)
  tlist  <- list(t1, t2, t3)
  
  all_terms <- unique(unlist(lapply(tlist, function(x) x$term)))
  present   <- term_order[term_order %in% all_terms]
  
  rows     <- list()
  prev_sec <- ""
  
  for (trm in present) {
    hdr <- sec_headers[[trm]]
    if (!is.null(hdr) && hdr != prev_sec) {
      rows[[length(rows) + 1]] <- data.frame(
        Variable = hdr, M1 = "", M2 = "", M3 = "", .is_header = TRUE,
        stringsAsFactors = FALSE
      )
      prev_sec <- hdr
    }
    cells <- lapply(tlist, function(td) {
      r <- td[td$term == trm, , drop = FALSE]
      if (nrow(r) == 0) return("\u2014")
      if (scale == "or")
        fmt_cell_or(r$estimate[1], r$conf.low[1], r$conf.high[1], r$p.value[1])
      else
        fmt_cell_ci(r$estimate[1], r$conf.low[1], r$conf.high[1], r$p.value[1],
                    se = r$std.error[1])
    })
    
    rows[[length(rows) + 1]] <- data.frame(
      Variable   = label_term(trm),
      M1         = cells[[1]],
      M2         = cells[[2]],
      M3         = cells[[3]],
      .is_header = FALSE,
      stringsAsFactors = FALSE
    )
  }
  do.call(rbind, rows)
}

add_n_row <- function(df, n_vec) {
  rbind(df, data.frame(
    Variable = "Observations",
    M1 = ifelse(is.na(n_vec[1]), "\u2014", formatC(n_vec[1], format = "d", big.mark = ",")),
    M2 = ifelse(is.na(n_vec[2]), "\u2014", formatC(n_vec[2], format = "d", big.mark = ",")),
    M3 = ifelse(is.na(n_vec[3]), "\u2014", formatC(n_vec[3], format = "d", big.mark = ",")),
    .is_header = FALSE, stringsAsFactors = FALSE
  ))
}

make_flextable <- function(df, title, notes,
                           col_widths = c(2.2, 1.3, 1.3, 1.3)) {
  hdr_rows <- which(as.logical(df$.is_header) == TRUE)
  obs_rows <- which(df$Variable == "Observations")
  df_out   <- df[, c("Variable", "M1", "M2", "M3")]
  
  ft <- flextable::flextable(df_out)
  ft <- flextable::set_header_labels(ft, Variable = " ", M1 = "M1", M2 = "M2", M3 = "M3")
  ft <- flextable::padding(ft, padding = 2, part = "all")
  ft <- flextable::line_spacing(ft, space = 1.0, part = "body")
  ft <- flextable::font(ft, fontname = "Times New Roman", part = "all")
  ft <- flextable::fontsize(ft, size = 10, part = "all")
  ft <- flextable::width(ft, j = 1, width = col_widths[1])
  ft <- flextable::width(ft, j = 2, width = col_widths[2])
  ft <- flextable::width(ft, j = 3, width = col_widths[3])
  ft <- flextable::width(ft, j = 4, width = col_widths[4])
  ft <- flextable::bold(ft, part = "header")
  ft <- flextable::align(ft, j = 1, align = "left", part = "header")
  ft <- flextable::align(ft, j = 2:4, align = "center", part = "header")
  ft <- flextable::align(ft, j = 1, align = "left", part = "body")
  ft <- flextable::align(ft, j = 2:4, align = "center", part = "body")
  
  if (length(hdr_rows) > 0) {
    ft <- flextable::bold(ft,   i = hdr_rows, part = "body")
    ft <- flextable::italic(ft, i = hdr_rows, part = "body")
    ft <- flextable::bg(ft,     i = hdr_rows, bg = "#F2F2F2", part = "body")
  }
  if (length(obs_rows) > 0) ft <- flextable::italic(ft, i = obs_rows, part = "body")
  
  ft <- flextable::border_remove(ft)
  ft <- flextable::hline_top(ft,
                             border = officer::fp_border(color = "black", width = 1.5), part = "header")
  ft <- flextable::hline_bottom(ft,
                                border = officer::fp_border(color = "black", width = 0.75), part = "header")
  ft <- flextable::hline_bottom(ft,
                                border = officer::fp_border(color = "black", width = 1.5), part = "body")
  
  ft <- flextable::add_header_lines(ft, values = title)
  ft <- flextable::bold(ft,     i = 1, part = "header")
  ft <- flextable::fontsize(ft, i = 1, size = 11, part = "header")
  ft <- flextable::align(ft,    i = 1, align = "left", part = "header")
  
  for (note in rev(notes)) ft <- flextable::add_footer_lines(ft, values = note)
  ft <- flextable::italic(ft, part = "footer")
  ft <- flextable::fontsize(ft, size = 9, part = "footer")
  ft <- flextable::align(ft, align = "left", part = "footer")
  ft <- flextable::color(ft, color = "gray40", part = "footer")
  ft
}

# Results

message("\nComputing tidy results (95% CI)...")
message("  Computing ordered logit (polr) SEs — profile CIs may take time...")

t1_1 <- tidy_polr_slopes(m_ol_1)
t1_2 <- tidy_polr_slopes(m_ol_2)
t1_3 <- tidy_polr_slopes(m_ol_3)

t2_1 <- tidy_glm_cl(m_lg_1, d_T2)
t2_2 <- tidy_glm_cl(m_lg_2, d_T2)
t2_3 <- tidy_glm_cl(m_lg_3, d_T2)

t3_1 <- tidy_count_model(m_z_1, d_T3)
t3_2 <- tidy_count_model(m_z_2, d_T3)
t3_3 <- tidy_count_model(m_z_3, d_T3)

ti_1 <- tidy_inflate_model(m_z_1, d_T3)
ti_2 <- tidy_inflate_model(m_z_2, d_T3)
ti_3 <- tidy_inflate_model(m_z_3, d_T3)

N_T1 <- c(model_N(m_ol_1), model_N(m_ol_2), model_N(m_ol_3))
N_T2 <- c(model_N(m_lg_1), model_N(m_lg_2), model_N(m_lg_3))
N_T3 <- c(model_N(m_z_1),  model_N(m_z_2),  model_N(m_z_3))

message("\nBuilding tables...")

ft_T1 <- make_flextable(
  add_n_row(build_table_df(t1_1, t1_2, t1_3, TERM_ORDER_MAIN, scale = "coef"), N_T1),
  title = "Table 1. Peacekeeping Participation and MID Hostility Level (Ordered Logit)",
  notes = c(NOTE_CI, NOTE_CLUS, NOTE_REG, NOTE_SUPP)
)

ft_T2 <- make_flextable(
  add_n_row(build_table_df(t2_1, t2_2, t2_3, TERM_ORDER_MAIN, scale = "or"), N_T2),
  title = "Table 2. Peacekeeping Participation and MID Initiation (Logit, Odds Ratios)",
  notes = c(NOTE_CI, "Reported estimates are odds ratios (exp(\u03b2)).",
            NOTE_CLUS, NOTE_REG, NOTE_SUPP)
)

ft_T3 <- make_flextable(
  add_n_row(build_table_df(t3_1, t3_2, t3_3, TERM_ORDER_MAIN, scale = "coef"), N_T3),
  title = "Table 3. Peacekeeping Participation and MID Frequency (ZINB Count Model)",
  notes = c(NOTE_CI,
            "Count-model coefficients from zero-inflated negative binomial regression.",
            NOTE_CLUS, NOTE_REG, NOTE_SUPP)
)

ft_T3b <- NULL

extract_inflate_direct <- function(model) {
  if (!inherits(model, "zeroinfl")) return(tibble::tibble())
  b <- model$coefficients$zero
  if (is.null(b) || length(b) == 0) return(tibble::tibble())
  se_vec <- tryCatch({
    V   <- stats::vcov(model)
    nc  <- length(model$coefficients$count)
    nz  <- length(model$coefficients$zero)
    idx <- seq(nc + 1, nc + nz)
    idx <- idx[idx <= nrow(V)]
    if (length(idx) == nz) sqrt(pmax(diag(V)[idx], 0)) else rep(NA_real_, nz)
  }, error = function(e) rep(NA_real_, length(b)))
  crit <- stats::qnorm(0.975)
  tibble::tibble(
    term      = names(b),
    estimate  = as.numeric(b),
    std.error = se_vec,
    conf.low  = ifelse(!is.na(se_vec) & se_vec > 0,
                       as.numeric(b) - crit * se_vec, NA_real_),
    conf.high = ifelse(!is.na(se_vec) & se_vec > 0,
                       as.numeric(b) + crit * se_vec, NA_real_),
    p.value   = ifelse(!is.na(se_vec) & se_vec > 0,
                       2 * stats::pnorm(abs(as.numeric(b) / se_vec),
                                        lower.tail = FALSE), NA_real_)
  ) %>% dplyr::filter(term != "(Intercept)")
}

use_ti_1 <- if (nrow(ti_1) > 0) ti_1 else extract_inflate_direct(m_z_1)
use_ti_2 <- if (nrow(ti_2) > 0) ti_2 else extract_inflate_direct(m_z_2)
use_ti_3 <- if (nrow(ti_3) > 0) ti_3 else extract_inflate_direct(m_z_3)

strip_zinb_prefix <- function(td) {
  if (nrow(td) == 0) return(td)
  td$term <- sub("^zero_",  "", td$term)
  td$term <- sub("^count_", "", td$term)
  td
}
use_ti_1 <- strip_zinb_prefix(use_ti_1)
use_ti_2 <- strip_zinb_prefix(use_ti_2)
use_ti_3 <- strip_zinb_prefix(use_ti_3)

all_infl_terms  <- unique(c(use_ti_1$term, use_ti_2$term, use_ti_3$term))
preferred_order <- c("cum_peace_10pp", "high_igo", "no_contig")
INFL_TERM_ORDER <- c(
  preferred_order[preferred_order %in% all_infl_terms],
  setdiff(all_infl_terms, preferred_order)
)

if (length(INFL_TERM_ORDER) > 0) {
  message("  Building Table 3b (inflation equation)...")
  ft_T3b <- make_flextable(
    add_n_row(
      build_table_df(use_ti_1, use_ti_2, use_ti_3, INFL_TERM_ORDER,
                     scale = "coef", sec_headers = SECTION_HEADERS_INFL),
      N_T3
    ),
    title = "Table 3b. ZINB Zero-Inflation Model — Structural Peace Predictors",
    notes = c(NOTE_CI, NOTE_ZINF, NOTE_CLUS)
  )
  message("  Table 3b built: ", paste(INFL_TERM_ORDER, collapse = " + "))
} else {
  message("  Table 3b not produced (no inflation terms recovered).")
}

message("  Tables built.")

# Figures

message("\nGenerating figures (1\u20134 only)...")

make_baseline <- function(data) {
  vars <- unique(c(IVS, RHS_COMMON, INFL_ZINB))
  out  <- list()
  for (v in vars) {
    if (!v %in% names(data)) next
    if (is.numeric(data[[v]])) out[[v]] <- mean(data[[v]], na.rm = TRUE)
    else                       out[[v]] <- mode_value(data[[v]])
  }
  for (r in intersect(REGION, names(data))) out[[r]] <- 0
  as.data.frame(out)
}

make_polr_newdata <- function(model, data, overrides = list()) {
  tt        <- stats::terms(model)
  pred_vars <- all.vars(stats::delete.response(tt))
  base <- list()
  for (v in pred_vars) {
    if (!v %in% names(data)) next
    if (is.numeric(data[[v]])) base[[v]] <- mean(data[[v]], na.rm = TRUE)
    else                       base[[v]] <- mode_value(data[[v]])
  }
  for (nm in names(overrides)) base[[nm]] <- overrides[[nm]]
  as.data.frame(base)
}

nd0 <- make_polr_newdata(m_ol_1, d_T1, list(troop_binary = 0))
nd1 <- make_polr_newdata(m_ol_1, d_T1, list(troop_binary = 1))

pr0 <- stats::predict(m_ol_1, newdata = nd0, type = "probs")
pr1 <- stats::predict(m_ol_1, newdata = nd1, type = "probs")
if (is.matrix(pr0)) { pr0 <- pr0[1, ]; pr1 <- pr1[1, ] }

host_levels <- names(pr0)
name_map    <- c(
  "No Dispute"         = "No Threat",
  "No Military Action" = "No Action",
  "Threat"             = "Threat",
  "Display of Force"   = "Display of Force",
  "Use of Force"       = "Use of Force",
  "War"                = "War"
)

prob_long <- data.frame(
  hostcat  = rep(host_levels, 2),
  scenario = rep(c("No Troops", "Troops Sent"), each = length(host_levels)),
  prob     = c(as.numeric(pr0), as.numeric(pr1)),
  stringsAsFactors = FALSE
)
prob_long$scenario       <- factor(prob_long$scenario, levels = c("No Troops", "Troops Sent"))
prob_long$hostcat_sample <- unname(name_map[prob_long$hostcat])

main_cats <- c("No Threat", "No Action", "Display of Force", "Use of Force")
tw_cats   <- c("Threat", "War")

prob_main <- prob_long[prob_long$hostcat_sample %in% main_cats, ]
prob_main$hostcat_sample <- factor(prob_main$hostcat_sample, levels = main_cats)

prob_tw <- prob_long[prob_long$hostcat_sample %in% tw_cats, ]
prob_tw$hostcat_sample <- factor(prob_tw$hostcat_sample, levels = tw_cats)

ymax_main <- ceiling(max(prob_main$prob, na.rm = TRUE) * 1.10 * 10) / 10
ymax_tw   <- signif(max(prob_tw$prob, na.rm = TRUE) * 1.15, 2)

fill_scale_fixed <- ggplot2::scale_fill_manual(
  breaks = c("No Troops", "Troops Sent"),
  values = c("No Troops" = COL_NOTROOPS, "Troops Sent" = COL_TROOPS),
  drop   = FALSE
)
legend_fixed <- ggplot2::guides(fill = ggplot2::guide_legend(
  nrow = 1, byrow = TRUE,
  keywidth  = ggplot2::unit(1.4, "cm"),
  keyheight = ggplot2::unit(0.7, "cm"),
  override.aes = list(color = "black", linewidth = 0.5)
))
panel_legend_theme <- ggplot2::theme(
  legend.position   = "bottom",
  legend.direction  = "horizontal",
  legend.text       = ggplot2::element_text(size = 14, family = BASE_FAMILY),
  legend.key.size   = ggplot2::unit(0.9, "cm"),
  legend.spacing.x  = ggplot2::unit(0.9, "cm"),
  legend.box.margin = ggplot2::margin(t = 8, b = 0),
  plot.margin       = ggplot2::margin(15, 20, 18, 20),
  panel.grid.major.y = ggplot2::element_line(color = "gray88", linewidth = 0.4,
                                             linetype = "dotted")
)

p1a <- ggplot2::ggplot(prob_main,
                       ggplot2::aes(x = hostcat_sample, y = prob, fill = scenario)) +
  ggplot2::geom_col(position = ggplot2::position_dodge(width = 0.75),
                    width = 0.70, color = "black", linewidth = 0.35) +
  fill_scale_fixed +
  ggplot2::scale_y_continuous(limits = c(0, ymax_main), expand = c(0, 0)) +
  ggplot2::labs(title = "Main Categories", x = NULL, y = "Predicted Probability") +
  theme_pub(12) +
  ggplot2::theme(plot.title  = ggplot2::element_text(face = "bold", hjust = 0.5, size = 16),
                 axis.text.x = ggplot2::element_text(size = 11)) +
  legend_fixed + panel_legend_theme

p1b <- ggplot2::ggplot(prob_tw,
                       ggplot2::aes(x = hostcat_sample, y = prob, fill = scenario)) +
  ggplot2::geom_col(position = ggplot2::position_dodge(width = 0.75),
                    width = 0.70, color = "black", linewidth = 0.35) +
  fill_scale_fixed +
  ggplot2::scale_y_continuous(limits = c(0, ymax_tw), expand = c(0, 0)) +
  ggplot2::labs(title = "Threat and War (Expanded Scale)", x = NULL, y = "Predicted Probability") +
  theme_pub(12) +
  ggplot2::theme(plot.title  = ggplot2::element_text(face = "bold", hjust = 0.5, size = 16),
                 axis.text.x = ggplot2::element_text(size = 11)) +
  legend_fixed + panel_legend_theme

row1 <- cowplot::plot_grid(p1a, p1b, nrow = 1, align = "hv", rel_widths = c(1.2, 0.8))
title1 <- cowplot::ggdraw() +
  cowplot::draw_label("Predicted Probability of MID Hostility by Troop Deployment",
                      fontface = "bold", fontfamily = BASE_FAMILY, size = 18,
                      x = 0.5, hjust = 0.5)
fig1_full <- cowplot::plot_grid(title1, row1, ncol = 1, rel_heights = c(0.12, 1))
save_fig(fig1_full, "Figure_1_Hostility_Probs.png", w = 14, h = 8)

base2 <- make_baseline(d_T2)
nd0   <- base2; nd0$troop_binary <- 0
nd1   <- base2; nd1$troop_binary <- 1
V_lg1 <- vcovCL_safe(m_lg_1, cluster = d_T2$cow)
rhs_lg1 <- stats::delete.response(stats::terms(m_lg_1))
X0   <- stats::model.matrix(rhs_lg1, nd0)
X1   <- stats::model.matrix(rhs_lg1, nd1)
eta0 <- as.numeric(X0 %*% stats::coef(m_lg_1))
eta1 <- as.numeric(X1 %*% stats::coef(m_lg_1))
se0  <- sqrt(as.numeric(X0 %*% V_lg1 %*% t(X0)))
se1  <- sqrt(as.numeric(X1 %*% V_lg1 %*% t(X1)))
crit95 <- stats::qnorm(0.975)

df2 <- data.frame(
  scenario = factor(c("No Troops", "Troops Sent"), levels = c("No Troops", "Troops Sent")),
  prob = c(stats::plogis(eta0), stats::plogis(eta1)),
  lo   = c(stats::plogis(eta0 - crit95 * se0), stats::plogis(eta1 - crit95 * se1)),
  hi   = c(stats::plogis(eta0 + crit95 * se0), stats::plogis(eta1 + crit95 * se1)),
  stringsAsFactors = FALSE
)

fig2 <- ggplot2::ggplot(df2, ggplot2::aes(x = scenario, y = prob, fill = scenario)) +
  ggplot2::geom_col(width = 0.55, alpha = 0.95, color = "black", linewidth = 0.5) +
  ggplot2::geom_errorbar(ggplot2::aes(ymin = lo, ymax = hi),
                         width = 0.12, linewidth = 1.2, color = "black") +
  ggplot2::scale_fill_manual(values = c("No Troops" = COL_NOTROOPS, "Troops Sent" = COL_TROOPS)) +
  ggplot2::scale_y_continuous(limits = c(0, max(df2$hi) * 1.15), expand = c(0, 0),
                              breaks = scales::pretty_breaks(6)) +
  ggplot2::labs(title = "Predicted Probability of MID Initiation",
                subtitle = "By Peacekeeping Troop Deployment (Logit Model)",
                x = "Troop Deployment", y = "Predicted Probability") +
  theme_pub(11) +
  ggplot2::theme(legend.position = "none",
                 panel.grid.major.y = ggplot2::element_line(color = "#E6E6E6", linewidth = 0.3))
save_fig(fig2, "Figure_2_Initiation_Probs.png", w = 8, h = 6)

message("  Generating Figure 3 (Naïve vs CF comparison)...")

SEL_Z <- keep_present(c("neverpk_dummy", RHS_COMMON))

add_lambda_probit <- function(df) {
  f1 <- stats::as.formula(paste0("troop_binary ~ ", paste(SEL_Z, collapse = " + ")))
  m1 <- tryCatch(stats::glm(f1, data = df, family = stats::binomial(link = "probit")),
                 error = function(e) NULL)
  if (is.null(m1)) {
    m1 <- stats::glm(
      stats::as.formula(paste0("troop_binary ~ ",
                               paste(setdiff(SEL_Z, "neverpk_dummy"), collapse = " + "))),
      data = df, family = stats::binomial(link = "probit"))
  }
  xb  <- stats::predict(m1, type = "link")
  p   <- pmin(pmax(stats::pnorm(xb), 1e-6), 1 - 1e-6)
  phi <- stats::dnorm(xb)
  df$lambda_cf <- ifelse(df$troop_binary == 1L, phi / p, -phi / (1 - p))
  list(data = df, stage1 = m1)
}

add_uhat_ols <- function(df, iv) {
  m1 <- stats::lm(stats::as.formula(paste0(iv, " ~ ", paste(SEL_Z, collapse = " + "))),
                  data = df)
  df$u_hat <- stats::residuals(m1)
  list(data = df, stage1 = m1)
}

fit_cf_bundle <- function(iv) {
  if (iv == "troop_binary") {
    d1 <- add_lambda_probit(d_T1)$data
    d2 <- add_lambda_probit(d_T2)$data
    d3 <- add_lambda_probit(d_T3)$data
    cf_term <- "lambda_cf"
  } else {
    d1 <- add_uhat_ols(d_T1, iv)$data
    d2 <- add_uhat_ols(d_T2, iv)$data
    d3 <- add_uhat_ols(d_T3, iv)$data
    cf_term <- "u_hat"
  }
  
  f_count <- paste0("mid_sum ~ ", rhs_str(iv), " + ", cf_term)
  f_zi    <- stats::as.formula(paste0(f_count, " | ", infl_str(INFL_ZINB)))
  
  m_zi <- tryCatch(
    pscl::zeroinfl(f_zi, data = d3, dist = "negbin", link = "logit",
                   control = pscl::zeroinfl.control(maxit = 3000)),
    error = function(e) NULL
  )
  if (is.null(m_zi)) {
    m_zi <- MASS::glm.nb(
      stats::as.formula(paste0("mid_sum ~ ", rhs_str(iv), " + ", cf_term)),
      data = d3, control = stats::glm.control(maxit = 100))
  }
  
  list(
    d1 = d1, d2 = d2, d3 = d3,
    hostlev   = MASS::polr(
      stats::as.formula(paste0("hostlev_f ~ ", rhs_str(iv), " + ", cf_term)),
      data = d1, method = "logistic", Hess = TRUE, model = TRUE),
    initiator = stats::glm(
      stats::as.formula(paste0("initiator ~ ", rhs_str(iv), " + ", cf_term)),
      data = d2, family = stats::binomial("logit")),
    mid_sum   = m_zi
  )
}

cf_1 <- fit_cf_bundle("troop_binary")
cf_2 <- fit_cf_bundle("ln_max_total_peacekeeper")
cf_3 <- fit_cf_bundle("logged_pk5years")

extract_one <- function(model, data, target_term, kind = c("polr", "glm", "count")) {
  kind <- match.arg(kind)
  td <- switch(kind,
               polr  = tidy_polr_robust(model, level = 0.95),
               glm   = tidy_glm_cl(model, data, level = 0.95),
               count = tidy_count_model(model, data, level = 0.95))
  td[td$term == target_term, , drop = FALSE]
}

IV_SHORT  <- c(troop_binary = "Troop Binary",
               ln_max_total_peacekeeper = "Max Logged PK",
               logged_pk5years = "Logged PK 5Y")
OUT_SHORT <- c(hostlev = "Hostility", initiator = "Initiator", mid_sum = "MID Exp.")

build_cmp <- function(iv) {
  n_host <- switch(iv, troop_binary = m_ol_1, ln_max_total_peacekeeper = m_ol_2, logged_pk5years = m_ol_3)
  n_init <- switch(iv, troop_binary = m_lg_1, ln_max_total_peacekeeper = m_lg_2, logged_pk5years = m_lg_3)
  n_cnt  <- switch(iv, troop_binary = m_z_1,  ln_max_total_peacekeeper = m_z_2,  logged_pk5years = m_z_3)
  cf     <- switch(iv, troop_binary = cf_1,   ln_max_total_peacekeeper = cf_2,   logged_pk5years = cf_3)
  ivlab  <- unname(IV_SHORT[iv]) %||% label_term(iv)
  
  dplyr::bind_rows(
    extract_one(n_host,       d_T1, iv, "polr")  %>% dplyr::mutate(outcome = OUT_SHORT["hostlev"],   spec = "Na\u00efve"),
    extract_one(cf$hostlev,   cf$d1, iv, "polr") %>% dplyr::mutate(outcome = OUT_SHORT["hostlev"],   spec = "CF"),
    extract_one(n_init,       d_T2, iv, "glm")   %>% dplyr::mutate(outcome = OUT_SHORT["initiator"], spec = "Na\u00efve"),
    extract_one(cf$initiator, cf$d2, iv, "glm")  %>% dplyr::mutate(outcome = OUT_SHORT["initiator"], spec = "CF"),
    extract_one(n_cnt,        d_T3, iv, "count") %>% dplyr::mutate(outcome = OUT_SHORT["mid_sum"],   spec = "Na\u00efve"),
    extract_one(cf$mid_sum,   cf$d3, iv, "count")%>% dplyr::mutate(outcome = OUT_SHORT["mid_sum"],   spec = "CF")
  ) %>%
    dplyr::mutate(
      iv_label  = ivlab,
      row_label = paste0(outcome, " | ", ivlab),
      spec      = factor(spec, levels = c("Na\u00efve", "CF"))
    )
}

cmp <- dplyr::bind_rows(
  build_cmp("troop_binary"),
  build_cmp("ln_max_total_peacekeeper"),
  build_cmp("logged_pk5years")
)

row_order <- c(paste0("Hostility | ", IV_SHORT),
               paste0("Initiator | ", IV_SHORT),
               paste0("MID Exp. | ",  IV_SHORT))

cmp <- cmp %>%
  dplyr::mutate(
    row_label = factor(row_label, levels = rev(row_order)),
    y         = as.numeric(row_label),
    y_off     = dplyr::if_else(spec == "Na\u00efve", y + 0.15, y - 0.15)
  )

finite_vals <- c(cmp$conf.low[is.finite(cmp$conf.low)],
                 cmp$conf.high[is.finite(cmp$conf.high)],
                 cmp$estimate[is.finite(cmp$estimate)])
xmin4 <- min(finite_vals) * 1.15
xmax4 <- max(max(finite_vals), 0.05) * 1.1

fig3 <- ggplot2::ggplot(cmp, ggplot2::aes(x = estimate, y = y_off)) +
  ggplot2::geom_vline(xintercept = 0, linetype = "solid", linewidth = 0.5, color = "gray40") +
  ggplot2::geom_errorbarh(ggplot2::aes(xmin = conf.low, xmax = conf.high, color = spec),
                          height = 0, linewidth = 1.3, na.rm = TRUE) +
  ggplot2::geom_point(ggplot2::aes(color = spec, shape = spec), size = 4) +
  ggplot2::scale_y_continuous(breaks = seq_along(levels(cmp$row_label)),
                              labels = levels(cmp$row_label), expand = c(0.02, 0.02)) +
  ggplot2::scale_color_manual(values = c("Na\u00efve" = COL_NAIVE, "CF" = COL_CF)) +
  ggplot2::scale_shape_manual(values = c("Na\u00efve" = 16, "CF" = 15)) +
  ggplot2::scale_x_continuous(limits = c(xmin4, xmax4)) +
  ggplot2::labs(title = "Na\u00efve vs. Von Stein Control Function Models",
                x = "Coefficient", y = NULL) +
  theme_pub(11) +
  ggplot2::theme(legend.position = "top", legend.title = ggplot2::element_blank(),
                 panel.grid.major.x = ggplot2::element_line(color = "#F0F0F0", linewidth = 0.3))
save_fig(fig3, "Figure_3_Naive_vs_CF.png", w = 10, h = 7)

message("  Generating Figure 4 (ZINB coefficients)...")

extract_count_beta <- function(model, data, target_term) {
  td <- tidy_count_model(model, data, level = 0.95)
  td[td$term == target_term, , drop = FALSE]
}

zdf <- dplyr::bind_rows(
  extract_count_beta(m_z_1, d_T3, "troop_binary")             %>% dplyr::mutate(iv = "Troop Presence\n(Binary)"),
  extract_count_beta(m_z_3, d_T3, "logged_pk5years")          %>% dplyr::mutate(iv = "Logged PK\n(5-Year Lag)"),
  extract_count_beta(m_z_2, d_T3, "ln_max_total_peacekeeper") %>% dplyr::mutate(iv = "Max Logged PK\n(Total)")
) %>%
  dplyr::mutate(iv = factor(iv, levels = c("Troop Presence\n(Binary)",
                                           "Logged PK\n(5-Year Lag)",
                                           "Max Logged PK\n(Total)")))

fig4 <- ggplot2::ggplot(zdf, ggplot2::aes(x = iv, y = estimate, color = iv)) +
  ggplot2::geom_hline(yintercept = 0, linetype = "dotted", linewidth = 0.6, color = "gray50") +
  ggplot2::geom_errorbar(ggplot2::aes(ymin = conf.low, ymax = conf.high),
                         width = 0.15, linewidth = 1.6) +
  ggplot2::geom_point(size = 6, shape = 21, fill = "white", stroke = 2.2) +
  ggplot2::scale_color_manual(values = c(COL_POINT_1, COL_POINT_2, COL_POINT_3)) +
  ggplot2::labs(title = "ZINB Coefficient Estimates for Peacekeeping Variables",
                x = NULL, y = "Coefficient Estimate (MID Count)",
                caption = "95% Confidence Intervals. All models include full controls.") +
  theme_pub(11) +
  ggplot2::theme(legend.position = "none",
                 panel.grid.major.y = ggplot2::element_line(color = "#F0F0F0", linewidth = 0.3))
save_fig(fig4, "Figure_4_ZINB_Coefs.png", w = 9, h = 6.5)

# Export

message("\nWriting complete DOCX...")

doc_path <- file.path(MAIN_DIR, "PKMID_JCR_Main_Replication.docx")
doc      <- officer::read_docx()

doc <- doc %>%
  officer::body_add_par("Main Manuscript Tables", style = "heading 1") %>%
  officer::body_add_par(paste0(
    "All models are estimated on a country-year panel, 1990–2014. ",
    "This script reads the preprocessed analysis-ready dataset generated by ",
    "00_build_analysis_data.R and reproduces the main manuscript tables and figures. ",
    "All confidence intervals are 95%. ",
    "Cluster-robust standard errors are clustered by country (COW code)."
  ), style = "Normal") %>%
  officer::body_add_par("", style = "Normal")

doc <- flextable::body_add_flextable(doc, ft_T1)
doc <- officer::body_add_par(doc, "", style = "Normal")
doc <- flextable::body_add_flextable(doc, ft_T2)
doc <- officer::body_add_par(doc, "", style = "Normal")
doc <- officer::body_add_break(doc)
doc <- flextable::body_add_flextable(doc, ft_T3)
doc <- officer::body_add_par(doc, "", style = "Normal")
if (!is.null(ft_T3b)) {
  doc <- flextable::body_add_flextable(doc, ft_T3b)
  doc <- officer::body_add_par(doc, "", style = "Normal")
}

doc <- officer::body_add_break(doc)
doc <- officer::body_add_par(doc, "Main Manuscript Figures", style = "heading 1")
for (i in 1:4) {
  fig_file <- Sys.glob(file.path(FIG_DIR, paste0("Figure_", i, "_*.png")))[1]
  if (!is.na(fig_file) && file.exists(fig_file)) {
    doc <- officer::body_add_par(doc, paste0("Figure ", i), style = "heading 2")
    doc <- officer::body_add_img(doc, src = fig_file, width = 6.5, height = 4.5)
    doc <- officer::body_add_par(doc, "", style = "Normal")
  }
}

print(doc, target = doc_path)
message("  Document saved: ", normalizePath(doc_path))

writeLines(capture.output(sessionInfo()),
           con = file.path(AUX_DIR, "sessionInfo_v4.16.txt"))

message("\n", strrep("=", 70))
message("PKMID JCR Main Replication Script v4.16 — complete")
message("ZINB inflation variables:")
message("  cum_peace_10pp (cumulative peace share, per 10 pp)")
message("  high_igo (top-tercile IGO embeddedness)")
message("  no_contig (no contiguous neighbors)")
message("ZINB estimation: direct optimization (maxit = 3000); NB fallback if needed")
message("Analytic samples: loaded from analysis_data.dta")
message("Inference: 95% confidence intervals")
message("SEs: clustered by country for GLM/ZINB; robust ordered-logit inference with fallbacks")
message("Document: ", normalizePath(doc_path))
message(strrep("=", 70))
